/*
This code produces simple summary statistics on the match rate for the data used for the 941 match
*/
/*
do "${dodir}/make_globals.do"

use "${datadir}/ppp_wide_dist" , clear


merge 1:m id using "${datadir}/f941", nogen keep(master matched)
*/

//Start with the regression which is limited on a number of dimensions from the raw file

use id q min_emp_2018 min_emp_2019 min_emp_2020q1 naics_2_mode st_mode ///
med_wage total_compensation num_employees g jobsreported_first using ${datadir}/reg_data, clear

// drop if two digit naics is missing
drop if naics_2_mode==. | naics_2_mode==0

// drop if state is missing
drop if st_mode==0 | st_mode==.	

// drop restuarants (special covid treatment), public admin, and utilities
keep if (naics_2_mode~=92 & naics_2_mode~=22 & naics_2_mode~=72 & naics_2_mode~=49 & naics_2_mode~=55)

// create min and max emp

g x_2019=num_employees if q>=5 & q<=8
	
egen mean_emp_2019=mean(x_2019), by(id)

drop x_2019

keep id q mean_emp_2019 min_emp_2020q1 jobsreported_first

save temp, replace

use temp, clear

//Matched at all in any quarter
gen ever_matched = jobsreported_first!= .

gen num = 1

//Generate firm size bins 10 employees wide

keep if inrange(num_employees, 295, 705) 
gen firm_size = .
local bin = 0
forvalues bin = 295(10)695 {
	replace firm_size = `bin' + 5 if jobsreported_first >= `bin' & jobsreported_first < `bin' + 10
	local bin = `bin' + 1
}

//Collapse to allow options for weighted and unweighted

gcollapse (sum) match_wt=ever_matched num_wt=num (rawsum) match_raw=ever_matched num_raw=num [pw=num_employees], by(firm_size)


//Divide match over total to get take up rates
gen uptake_raw = round(100*(match_raw / num_raw), .1)
gen uptake_wt = round(100*(match_wt / num_wt), .1)


//Now do some plotidg

line uptake_raw firm_size, /// 
	xtitle(Firm Size) xlabel(,nogrid) /// 
	xline(500) ///
	ytitle(% of Wage Payers) ylabel(,nogrid) /// 
	lcolor(navy)
	graph export $outdir/sum_stats/threshold_SBA_500.png, replace width(3000)

	
	
	
	
	
	
	
	
//Matched at all in any quarter

preserve 
gcollapse (firstnm) forgivenessamount_first jobsreported_first (max) ever_matched within_10_pct (mean) jobs_rate, by(id)
gcollapse (count) id (sum) forgivenessamount_first jobsreported_first, by(ever_matched)
export excel "${outdir}/sum_stats/match_941_summary.xlsx", firstrow(variables) sheet("Ever Matched", replace)
restore

//Matched by quarter in 2020

preserve 
gcollapse (count) id (sum) forgivenessamount_first jobsreported_first within_10_pct (median) jobs_rate, by(ever_matched quarter)
export excel "${outdir}/sum_stats/match_941_summary.xlsx", firstrow(variables) sheet("Ever Matched by Quarter", replace)
restore

//Collapse to allow options for weighted and unweighted

gcollapse (sum) match_wt=ever_matched num_wt=num (rawsum) match_raw=ever_matched num_raw=num [pw=num_employees], by(firm_size)


//Divide match over total to get take up rates
gen uptake_raw = round(100*(match_raw / num_raw), .1)
gen uptake_wt = round(100*(match_wt / num_wt), .1)


//Now do some plotidg

line uptake_raw firm_size, /// 
	xtitle(Firm Size) xlabel(,nogrid) /// 
	xline(500) ///
	ytitle(% of Wage Payers) ylabel(,nogrid) /// 
	lcolor(navy)
	graph export $outdir/sum_stats/threshold_500.png, replace width(3000)
